{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 时间序列基础\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from datetime import datetime\n",
    "from datetime import timedelta\n",
    "from dateutil.parser import parse\n",
    "from pandas import DataFrame, Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n",
      "<class 'pandas.core.indexes.datetimes.DatetimeIndex'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2011-01-02    0.772477\n",
       "2011-01-05    1.753706\n",
       "2011-01-07    0.687584\n",
       "2011-01-08    1.050631\n",
       "2011-01-10   -1.166636\n",
       "2011-01-12   -1.201186\n",
       "dtype: float64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = [datetime(2011, 1, 2),\n",
    "         datetime(2011, 1, 5),\n",
    "         datetime(2011, 1, 7),\n",
    "         datetime(2011, 1, 8),\n",
    "         datetime(2011, 1, 10),\n",
    "         datetime(2011, 1, 12)]\n",
    "ts = Series(np.random.randn(6), index=dates)\n",
    "print(type(ts)) # 注意，和书上说的TimeSeries不一样。\n",
    "print(type(ts.index))\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011-01-02    1.544953\n",
       "2011-01-05         NaN\n",
       "2011-01-07    1.375168\n",
       "2011-01-08         NaN\n",
       "2011-01-10   -2.333272\n",
       "2011-01-12         NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts + ts[::2] # 无法对齐的地方自动填充NA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('<M8[ns]')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.index.dtype # 以ns为单位保存时间戳"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2011-01-02 00:00:00')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stamp = ts.index[0]\n",
    "stamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 索引、选取、子集构造"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.1666361345741005"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts['1/10/2011']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.1666361345741005"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts['20110110']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000-01-01    0.535422\n",
      "2000-01-02    0.218639\n",
      "2000-01-03   -0.798365\n",
      "2000-01-04    1.438230\n",
      "2000-01-05    1.365312\n",
      "Freq: D, dtype: float64\n",
      "2002-09-22   -0.180060\n",
      "2002-09-23   -0.270658\n",
      "2002-09-24   -0.963958\n",
      "2002-09-25   -0.499083\n",
      "2002-09-26    0.598380\n",
      "Freq: D, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "longer_ts = Series(np.random.randn(1000),\n",
    "                   index=pd.date_range('1/1/2000', periods=1000)) # 连续1000天的数据\n",
    "print(longer_ts.head())\n",
    "print(longer_ts.tail())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2001-01-01    0.562310\n",
       "2001-01-02    0.256819\n",
       "2001-01-03   -1.480738\n",
       "2001-01-04   -1.004320\n",
       "2001-01-05    0.318483\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "longer_ts['2001'].head() # 直接选年份"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2001-05-01    0.871160\n",
       "2001-05-02    0.664891\n",
       "2001-05-03   -0.472416\n",
       "2001-05-04   -0.626066\n",
       "2001-05-05   -0.594843\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "longer_ts['2001-05'].head() # 年 + 月"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011-01-02   -1.027031\n",
       "2011-01-05   -0.055124\n",
       "2011-01-07   -1.830234\n",
       "2011-01-08    2.434101\n",
       "2011-01-10    0.404510\n",
       "2011-01-12   -0.593850\n",
       "dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = [datetime(2011, 1, 2),\n",
    "         datetime(2011, 1, 5),\n",
    "         datetime(2011, 1, 7),\n",
    "         datetime(2011, 1, 8),\n",
    "         datetime(2011, 1, 10),\n",
    "         datetime(2011, 1, 12)] # 重新构造一遍，便于查询数据\n",
    "ts = Series(np.random.randn(6), index=dates)\n",
    "ts[datetime(2011, 1, 7):] # 为什么切片只返回4天？\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011-01-07   -1.830234\n",
       "2011-01-08    2.434101\n",
       "2011-01-10    0.404510\n",
       "2011-01-12   -0.593850\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts['1/6/2011':'1/20/2011'] # 1/6和1/20不存在没关系，自动会查。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011-01-02   -1.027031\n",
       "2011-01-05   -0.055124\n",
       "2011-01-07   -1.830234\n",
       "2011-01-08    2.434101\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.truncate(after='1/8/2011') # 最远到2011/1/8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Colorado</th>\n",
       "      <th>Texas</th>\n",
       "      <th>New York</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2000-01-05</th>\n",
       "      <td>-0.341666</td>\n",
       "      <td>-1.231550</td>\n",
       "      <td>-1.016394</td>\n",
       "      <td>-1.301498</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-12</th>\n",
       "      <td>0.744188</td>\n",
       "      <td>0.186536</td>\n",
       "      <td>-1.014354</td>\n",
       "      <td>-0.497517</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-19</th>\n",
       "      <td>0.273858</td>\n",
       "      <td>-2.213061</td>\n",
       "      <td>1.391597</td>\n",
       "      <td>1.384184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-26</th>\n",
       "      <td>0.859288</td>\n",
       "      <td>0.995682</td>\n",
       "      <td>-0.511652</td>\n",
       "      <td>-0.617411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-02-02</th>\n",
       "      <td>0.019173</td>\n",
       "      <td>0.207258</td>\n",
       "      <td>-0.474774</td>\n",
       "      <td>-0.428453</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Colorado     Texas  New York      Ohio\n",
       "2000-01-05 -0.341666 -1.231550 -1.016394 -1.301498\n",
       "2000-01-12  0.744188  0.186536 -1.014354 -0.497517\n",
       "2000-01-19  0.273858 -2.213061  1.391597  1.384184\n",
       "2000-01-26  0.859288  0.995682 -0.511652 -0.617411\n",
       "2000-02-02  0.019173  0.207258 -0.474774 -0.428453"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n",
    "long_df = DataFrame(np.random.randn(100, 4),\n",
    "                    index=dates,\n",
    "                    columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
    "long_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Colorado</th>\n",
       "      <th>Texas</th>\n",
       "      <th>New York</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2001-05-02</th>\n",
       "      <td>1.267190</td>\n",
       "      <td>-0.556569</td>\n",
       "      <td>-0.722254</td>\n",
       "      <td>-2.236654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001-05-09</th>\n",
       "      <td>-0.685621</td>\n",
       "      <td>1.460324</td>\n",
       "      <td>0.942097</td>\n",
       "      <td>0.950523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001-05-16</th>\n",
       "      <td>1.059505</td>\n",
       "      <td>-1.452266</td>\n",
       "      <td>1.687294</td>\n",
       "      <td>0.636464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001-05-23</th>\n",
       "      <td>-0.194557</td>\n",
       "      <td>-0.142167</td>\n",
       "      <td>1.460389</td>\n",
       "      <td>-0.284870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001-05-30</th>\n",
       "      <td>-1.556288</td>\n",
       "      <td>-2.129977</td>\n",
       "      <td>0.125112</td>\n",
       "      <td>-1.470031</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Colorado     Texas  New York      Ohio\n",
       "2001-05-02  1.267190 -0.556569 -0.722254 -2.236654\n",
       "2001-05-09 -0.685621  1.460324  0.942097  0.950523\n",
       "2001-05-16  1.059505 -1.452266  1.687294  0.636464\n",
       "2001-05-23 -0.194557 -0.142167  1.460389 -0.284870\n",
       "2001-05-30 -1.556288 -2.129977  0.125112 -1.470031"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "long_df.loc['5-2001'] # 2001年5月"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 带有重复索引的时间序列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dates = pd.DatetimeIndex(['1/1/2000',\n",
    "                          '1/2/2000',\n",
    "                          '1/2/2000',\n",
    "                          '1/2/2000',\n",
    "                          '1/3/2000'])\n",
    "dup_ts = Series(np.arange(5), index=dates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dup_ts.index.is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dup_ts['1/3/2000'] # 不重复"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-02    1\n",
       "2000-01-02    2\n",
       "2000-01-02    3\n",
       "dtype: int32"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dup_ts['1/2/2000'] # 重复"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000-01-01    0\n",
      "2000-01-02    2\n",
      "2000-01-03    4\n",
      "dtype: int32\n"
     ]
    }
   ],
   "source": [
    "grouped = dup_ts.groupby(level=0)\n",
    "print(grouped.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-01    1\n",
       "2000-01-02    3\n",
       "2000-01-03    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.count()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
